# delimit ; 

************************************ FE*****************************;

clear all; 
set mem 300m;	
set more 1 ;  
drop _all;
program drop _all;
capture log close;

global folder "C:\Dropbox";
         local couples "$folder\hrs\JEP\";
*cd C:\Users\ebf26\Dropbox\hrs\wealthcouples;
cd `couples';
log using impute_Medicaid.log, replace ;
*log using `couples'impute_Medicaid.log, replace ; 
pause off;

set seed 527;
*------------------------------------------------------------;

local varlist age year nurs_ind male oopd black heal_temp faminc_temp lfpr dead hschool somecollege college age2 age3 age4 married nurs_inddead nurs_indoopd deadoopd drdenhosp nursing;

* number of cells to match on: use 200 as a default;
local number_cells = 200;

*use `couples'dataprep0;
use dataprep0;

gen year=realyear+1900;
gen nurs_ind=.;
*for consistency with the MCBS indicator and later state variable;
replace nurs_ind=1 if nursing>=60 & nursing!=.;
replace nurs_ind=0 if nursing<60 & nursing!=.;
rename medicaid medicaid_ind;

*age polynomial;
gen age2=age*age;
gen age3=age2*age;
gen age4=age3*age;

*Generate MCBS consistent education groups;
gen lesshschool=0;
replace lesshschool=inrange(school, 0,11);

gen hschool=0;
replace hschool=1 if school==12;

gen somecollege=0;
replace somecollege=inrange(school,13,15);

gen college=0;
replace college=1 if school==16 | school==17;

//control for missing education;
replace hschool=1 if school==.;

gen heal_temp=0;
replace heal_temp=1 if heal==4|heal==5;
replace heal_temp=1 if dead==1;



//control for missing health in same way as MCBS;

replace heal_temp=1 if heal_temp==.;
*Note: oopd is the same as oop in MCBS;

*Generate family income variable that matches MCBS- this is also generated again dataprep_couples1.do;
gen faminc_temp=.; 
* INCOME;
*******ERIC'S CODING**********************************;
* note:faminc laby + selfy+ capy + peny + othy + socy ;
* laby=labor inc                                      ;
* selfy = self-employment income                      ;
* capy=capital inc                                    ;
* peny=pension inc (from a firm, va),+annuties        ;
* othy= ssi, unemployment, workers comp               ;
* socy=social security/di income                      ;
* RAND CODING ****************************************;
* note:faminc laby + selfy+ capy + peny + othy + socy ;
* laby=labor inc                                      ;
* capy=capital inc AND SELF EMPLOYEMENT INCOME        ;
* peny=pension inc (ONLY FROM FIRMS, NOT THE VA),+annuties;
* socy=social security ONLY (NO DI INCOME)            ;
* SSDIY= SSI and DI                                   ;
* UNEMPY=unemployment and workers compensation        ;
* OTHgovY= other government, such as veterans benefits, welfare, food stamps;
* OTHincY=inheritance, lump sum pension, alimony      ;
******************************************************;
if useRAND==0{;
gen anninc= socy+peny; * annuitized income=peny+othy;
gen hhinc=anninc+laby+selfy; * include labor income;
replace faminc_temp=hhinc+capy +othy; * include capital income, welfare, SSI;
};


if useRAND==1{;
gen anninc=socy+peny +VAinc; * Note: took out SSDIY and welfare, because we model the consumption floor from SSI/welfare payments explicitly, but we want veteran's benefits in there;
* gen anninc=socy+peny+SSDIY+UNEMPY+OTHgovY; * Old version;
*replace anninc=anninc+OTHincY;
gen hhinc=anninc+laby; *add in SSDIY here ;
replace faminc_temp=hhinc+capy +UNEMPY +SSDIY; * include capital income, welfare, SSI, SSDI;

};


gen lowlev=1000;
replace faminc_temp=lowlev if faminc_temp<lowlev & faminc_temp~=.;

replace faminc_temp=log(faminc_temp);

*Feed forardswards last period if missing;
sort HHID PN wave;
replace faminc_temp=faminc_temp[_n-1] if HHID==HHID[_n-1] & PN==PN[_n-1] & faminc_temp==. & faminc_temp[_n-1]~=.;
replace lfpr=lfpr[_n-1] if HHID==HHID[_n-1] & PN==PN[_n-1] & lfpr==. & lfpr[_n-1]~=.;
replace nurs_ind=nurs_ind[_n-1] if HHID==HHID[_n-1] & PN==PN[_n-1] & nurs_ind==. & nurs_ind[_n-1]~=.;


* check that everyone has their spouses income;
sort HHID wave PN;
replace faminc=faminc[_n-1] if HHID==HHID[_n-1] & wave==wave[_n-1] & faminc[_n-1]~=.;

//control for missing in work in the same way as MCBS;
replace lfpr=0 if lfpr==.;

//control for missing married in the same way as MCBS;
replace married=0 if married==.;

//control for missing race info in the same way as MCBS;
replace black=0 if black==.;



gen drvisit=0;
replace drvisit=1 if drtimes>0 &drtimes!=.;

gen hospstay=0;
replace hospstay=1 if hosp>0 &hosp!=.;


gen drdenhosp=0;
replace drdenhosp=max(drvisit,hospstay,dentist);

gen nurs_inddead=nurs_ind*dead;
gen nurs_indoopd=nurs_ind*oopd;
gen deadoopd=dead*oopd;

sum `varlist';


gen medicaid=.;
append using impute\impute_coeffs2yr.dta;
*append using `couples'impute\impute_coeffs.dta;

rename bheal bheal_temp;
gen medicaidp=0;
sum b0, meanonly;
replace medicaidp=r(mean);

drop b0;

*pause on;
sort dead;
by dead: sum medicaidp;

sum `varlist';
pause;

foreach var in `varlist'{;
local var2 b`var';
sum `var2', meanonly;
local coeff=r(mean);

replace medicaidp=`coeff' *`var'+medicaidp;
*by dead: sum `var2';
*by dead: sum `var';
*by dead: sum medicaidp;
pause;
drop `var2';
};
*
replace medicaidp=0 if medicaid~=1;

*eliminate the row of data associated with appended coefficients;
gen donor=0;


append using impute\impute_donors2yr.dta;
*append using `couples'impute\impute_donors.dta;

//generate for both the recipients and odnors (no missing by construction);
gen medicaidpmiss=0;
replace medicaidpmiss=1 if medicaidp==.;
 
***********************************************************
* deciles are based on all xB in the MCBS who meet the match criteria
***********************************************************;

//Generate pctiles from donor distribution;

pctile store_pctile=medicaidp if donor==1, nquantiles(`number_cells') genp(percent_medicaidp);  

xtile sort_group=medicaidp, cutpoints(store_pctile);

gen uniform=1/`number_cells';
twoway (histogram sort_group, width(1) fraction) (line uniform sort_group);

twoway (histogram sort_group if medicaid_ind==1 & medcost~=. &age>=65, width(1) fraction) (line uniform sort_group);

drop uniform;
*This means we have the total number of cells defined from the donor distribution and;
*applied to both donors and recipients;

sum `varlist' if wave==3;

sum `varlist' if wave==4;

sum `varlist' if wave==5;


***********************************************************
*  Two groups are found within our decile, the "Donor" group
*  (who have a value for Medicaid) and
*  a recipient group (who are in the right decile).
*  The donor group's size is stored in "max" while the 
*  recipient group's size is stored in "max2".  Each memeber
*  of the decile has sortflag = 0 while each member of the 
*  decile that is a donor has sortflag2 = 0.
*  The loop then assigns everyone in the decile a random draw
*  from the donor group.
***********************************************************;

gen residmatch = .;
gen sortflag = .;

qui sum medicaidpmiss if medicaidpmiss==0 & donor==0;
di as text "Total number to impute " as result r(N);

//Loop over the number of groups;
forvalues quantile_count=1(1)`number_cells'{;

replace sortflag=1; //select the group of interest if sortflag is 0 (due to lowest first sort);
replace sortflag=0 if sort_group==`quantile_count'; //select the correct quantile

qui sum medicaidp if sortflag==0 & donor==1;
local max = r(N); *Donor group;


qui sum medicaidp if sortflag==0 & donor==0;
local max2 = r(N); *recipient group;

local i = 1;


sort medicaidpmiss sortflag donor HHID; //medicaidpmiss gives the sortgroup first varying by recipient then donor status then ID within;


display as text "Number of MCBS donors " as result `max' ;
display as text "Number of HRS recipients " as result `max2';

while `i' <= `max2' {;
	//because donors come first count on from recipients
  local i2 =`max2'+ceil(uniform()*`max'); *from max2+1 all the way up to max2+max;
  qui replace residmatch = residsmt[`i2'] in `i';
  local i = `i' + 1;
};
*close on the while;

};
*close on the nuber of cells;


*************************************************
*  Finally, values are imputed.
*************************************************;

gen medicaidpp=medicaidp+residmatch;
sum medicaidpp medicaid;

sum medicaidpp medicaid if dead==0;


replace medicaid=medicaidpp if medicaid==. & medicaid_ind==1;
replace medicaid=0 if medicaid==. & medicaid_ind==0;

drop if donor==1; //drop the non-HRS data;
drop donor sortflag medicaidp medicaidpp residsmt residmatch sort_group store_pctile percent_medicaidp ;

sum medicaid;

sum medicaid if medicaid_ind==1;

drop anninc hhinc faminc_temp age2 age3 age4;
drop lowlev lesshschool hschool somecollege college nurs_ind year heal_temp;
drop drvisit hospstay drdenhosp nurs_inddead deadoopd nurs_indoopd;

rename medicaid medicaid_pay;
rename medicaid_ind medicaid;

* a few people have negative imputed medicaid payments--set these to 0.  this only changes the mean by about 3%;
replace medicaid_pay=0 if medicaid_pay<0;

save `couples'dataprep0_impute, replace;
*save dataprep0_impute, replace;

drop _all;
program drop _all;
log close;
